library(tidyverse)
library(tigris)
library(censusapi)
library(sf)
library(mapview)
library(plotly)
library(lehdr)
library(leaflet)
options(
tigris_class = "sf",
tigris_use_cache = T
)
Sys.setenv(CENSUS_KEY="10dcd73d7c043e91bac9fb8d3989cbff54b08790")
Using the LODES dataset and the list of essential workers to map the distribution of essential workers by block group in San Jose
First get the block groups in San Jose
scc_blockgroups <- block_groups("CA","Santa Clara", cb=F, progress_bar=F)
# Use tracts sent to us by San Jose
sj_tracts <- st_read("/Users/simonespeizer/pCloud Drive/Shared/SFBI/Data Library/San_Jose/CSJ_Census_Tracts/CSJ_Census_Tracts.shp") %>%
st_as_sf() %>%
st_transform(st_crs(scc_blockgroups))
## Reading layer `CSJ_Census_Tracts' from data source `/Users/simonespeizer/pCloud Drive/Shared/SFBI/Data Library/San_Jose/CSJ_Census_Tracts/CSJ_Census_Tracts.shp' using driver `ESRI Shapefile'
## Simple feature collection with 219 features and 9 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 6112856 ymin: 1869687 xmax: 6255982 ymax: 1996555
## epsg (SRID): 2227
## proj4string: +proj=lcc +lat_1=38.43333333333333 +lat_2=37.06666666666667 +lat_0=36.5 +lon_0=-120.5 +x_0=2000000.0001016 +y_0=500000.0001016001 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=us-ft +no_defs
sj_citycouncil_disticts <- st_read("/Users/simonespeizer/pCloud Drive/Shared/SFBI/Data Library/San_Jose/City Council Districts/CITY_COUNCIL_DISTRICTS.shp") %>%
st_as_sf() %>%
st_transform(st_crs(scc_blockgroups))
## Reading layer `CITY_COUNCIL_DISTRICTS' from data source `/Users/simonespeizer/pCloud Drive/Shared/SFBI/Data Library/San_Jose/City Council Districts/CITY_COUNCIL_DISTRICTS.shp' using driver `ESRI Shapefile'
## Simple feature collection with 10 features and 7 fields
## geometry type: POLYGON
## dimension: XY
## bbox: xmin: 6112856 ymin: 1869687 xmax: 6255982 ymax: 1996555
## epsg (SRID): 2227
## proj4string: +proj=lcc +lat_1=38.43333333333333 +lat_2=37.06666666666667 +lat_0=36.5 +lon_0=-120.5 +x_0=2000000.0001016 +y_0=500000.0001016001 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=us-ft +no_defs
# from code written by others to get SJ blockgroups
sj_blockgroups <-
scc_blockgroups %>%
st_centroid() %>%
st_join(sj_tracts, left = F) %>%
st_join(sj_citycouncil_disticts%>% dplyr::select(DISTRICTS)) %>%
mutate(
DISTRICTS = DISTRICTS %>% factor(levels = c("1","2","3","4","5","6","7","8","9","10"))
) %>%
st_set_geometry(NULL) %>%
left_join(scc_blockgroups%>% dplyr::select(GEOID), by = "GEOID") %>%
st_as_sf() %>%
dplyr::select(GEOID, DISTRICTS)
# the spatial join leaves off two blockgroups which are touching district 9. The following code assigns those to district 9
sj_blockgroups$DISTRICTS[is.na(sj_blockgroups$DISTRICTS)] <- 9
sj_boundary <-
places("CA", cb=F, progress_bar=F) %>%
filter(NAME == "San Jose")
Next obtain the distribution of workers from the LODES dataset
# define the LODES categories, from https://lehd.ces.census.gov/data/lodes/LODES7/LODESTechDoc7.3.pdf
LODES_variable <- c('CNS01', 'CNS02', 'CNS03', 'CNS04', 'CNS05', 'CNS05', 'CNS05', 'CNS06', 'CNS07', 'CNS07', 'CNS08', 'CNS08', 'CNS09', 'CNS10', 'CNS11', 'CNS12', 'CNS13', 'CNS14', 'CNS15', 'CNS16', 'CNS17', 'CNS18', 'CNS19', 'CNS20')
LODES_NAICS <- c('11', '21', '22', '23', '31', '32', '33', '42', '44', '45', '48', '49', '51', '52', '53', '54', '55', '56', '61', '62', '71', '72', '81', '92')
LODES_keys <- data.frame(LODES_variable, LODES_NAICS)
# get the LODES data
sj_rac <-
grab_lodes(
state = "ca",
year = 2017,
lodes_type = "rac",
job_type = "JT01",
segment = "S000",
state_part = "main",
agg_geo = "bg"
) %>%
left_join(sj_blockgroups, by = c("h_bg" = "GEOID")) %>%
filter(!is.na(DISTRICTS)) %>%
dplyr::select(-c(contains("CE"),contains("CA"),contains("CR"), contains("CT"),contains("CS"),contains("CD")))
Now we look at the breakdown of essential workers.
Method 1: I assume that the number of workers in each subtype (4 digit code) of each NAICS 2 digit code is uniform among the 4 digit codes listed for that subtype in determining how many “essential” workers that 2 digit code includes; this is an assumption that could be changed.
# here I will use the Delaware information on essential businesses for the moment
DE_essential <- read_csv('/Users/simonespeizer/pCloud Drive/Shared/SFBI/Data Library/Essential Designations/delaware_essential_designations.csv')
DE_essential_grouped <- DE_essential %>% mutate(NAICS_2_dig = substr(NAICS, 1, 2)) %>%
full_join(LODES_keys, by = c('NAICS_2_dig' = 'LODES_NAICS')) %>%
group_by(LODES_variable, Essential) %>%
summarize(Count = n()) %>% # get number of subcodes within each 2 digit code that are essential and nonessential
ungroup() %>%
mutate(Essential = replace_na(Essential, FALSE)) %>% # assume if not listed it is not essential
spread(Essential, Count, fill = 0) %>%
rename(c("Essential" = "TRUE", "Nonessential" = "FALSE")) %>%
mutate(totalCount = Essential + Nonessential, fracEssential = Essential / totalCount)
# note that the Delaware list didn't include 92, public administration - but I'm going to assume that's essential and manually enter that here
DE_essential_grouped$fracEssential[20] = 1.0
# here I assume that the number of workers in each subtype (4 digit code) of each NAICS 2 digit code is uniform among the 4 digit codes listed for that subtype in determining how many "essential" workers that 2 digit code includes; this is an assumption that could be changed.
sj_rac_essential <- sj_rac %>% gather("Category", "Number", CNS01:CNS20) %>%
left_join(DE_essential_grouped %>% dplyr::select(fracEssential, LODES_variable), by = c("Category" = "LODES_variable")) %>%
mutate(numEssential = fracEssential * Number) %>%
group_by(h_bg, C000) %>%
summarize(totalEssential = round(sum(numEssential), 0)) %>%
ungroup() %>%
mutate(fracEssential = totalEssential / C000)
Map of method 1
# set up palette
blue_pal <- colorNumeric(
palette = "Blues",
domain =
c(sj_rac_essential %>%
pull(fracEssential) %>%
unique())
)
map <- leaflet(data = sj_rac_essential) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(
data =
sj_rac_essential %>%
left_join(sj_blockgroups, by = c("h_bg" = "GEOID")) %>%
st_as_sf() %>% st_set_crs(4326),
fillColor = ~blue_pal(fracEssential),
color = "white",
weight = 0.25,
fillOpacity = 0.5,
label = ~paste0(fracEssential,"% of workers that are essential"),
highlightOptions = highlightOptions(
weight = 2,
opacity = 1
)
)
map
Method 2: Only count as essential the 2 digit NAICS codes with >=80% of the 4 digit sub-codes marked as essential
DE_binary_essential <- DE_essential_grouped %>% mutate(binaryEssential = fracEssential >= 0.8)
sj_rac_essential_2 <- sj_rac %>% gather("Category", "Number", CNS01:CNS20) %>%
left_join(DE_binary_essential %>% dplyr::select(binaryEssential, LODES_variable), by = c("Category" = "LODES_variable")) %>%
filter(binaryEssential == TRUE) %>%
group_by(h_bg, C000) %>%
summarize(totalEssential = sum(Number)) %>%
ungroup() %>%
mutate(fracEssential = round((totalEssential / C000)*100, 1))
Map of method 2
# set up palette
blue_pal <- colorNumeric(
palette = "Blues",
domain =
c(sj_rac_essential_2 %>%
pull(fracEssential) %>%
unique())
)
map <- leaflet(data = sj_rac_essential_2) %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(
data =
sj_rac_essential_2 %>%
left_join(sj_blockgroups, by = c("h_bg" = "GEOID")) %>%
st_as_sf() %>% st_set_crs(4326),
fillColor = ~blue_pal(fracEssential),
color = "white",
weight = 0.25,
fillOpacity = 0.5,
label = ~paste0(fracEssential,"% of workers that are essential"),
highlightOptions = highlightOptions(
weight = 2,
opacity = 1
)
)
map